************************************************
***Title: merge.do
***Creators: Joelle Abramowitz, Shooshan Danagoulian, and Owen Fleming*
***Notes: This file merges all of the cleaned data at the county-date level and then creates several variables to construct the final analytic dataset.

*For questions, contact
*Owen Fleming
*hg3490@wayne.edu
************************************************


**********SETUP
clear all


**********MERGES
*Open pollen data
use data/pollen/pollen.dta, clear

*Merge suicides, keep only in the coverage overlap between pollen-NVDRS, then replace missing zeroes
merge 1:m county date using data/nvdrs/nvdrs
drop if _merge==2
drop _merge

drop if year > 2018
drop if statename == "CALIFORNIA" & !inrange(year,2016,2018)
drop if statename == "CONNECTICUT" & !inrange(year,2014,2018)
drop if statename == "DELAWARE" & !inrange(year,2016,2018)
drop if statename == "DISTRICT OF COLOMBIA" & !inrange(year,2016,2018)
drop if statename == "IDAHO" & year != 2018
drop if statename == "ILLINOIS" & !inrange(year,2014,2018)
drop if statename == "INDIANA" & !inrange(year,2014,2018)
drop if statename == "IOWA" & !inrange(year,2014,2018)
drop if statename == "KANSAS" & !inrange(year,2014,2018)
drop if statename == "MICHIGAN" & !inrange(year,2009,2018)
drop if statename == "MISSOURI" & !inrange(year,2016,2018)
drop if statename == "NEBRASKA" & !inrange(year,2016,2018)
drop if statename == "NEVADA" & !inrange(year,2016,2018)
drop if statename == "NEW YORK" & !inrange(year,2014,2018)
drop if statename == "OHIO" & !inrange(year,2009,2018)
drop if statename == "PENNSYLVANIA" & !inrange(year,2014,2018)
drop if statename == "TENNESSEE" & year != 2018
drop if statename == "TEXAS" & year != 2018
drop if statename == "WASHINGTON" & !inrange(year,2014,2018)

replace count=0 if missing(count)
foreach i of varlist count_* {
	replace `i' = 0 if missing(`i')
}

*Merge weather
merge 1:1 county date using data/weather/weather.dta
drop if _merge==2
drop _merge

*Merge wind
merge 1:1 county date using data/wind/wind.dta
drop if _merge==2
drop _merge

*Merge PM 2.5
merge 1:1 county date using data/pm2pt5/pm2pt5.dta
drop if _merge==2
drop _merge

*Merge AQI
merge 1:1 county date using data/aqi/aqi.dta
drop if _merge==2
drop _merge

*Oak masting
merge m:1 county_measurement year using data/oak_masting/oak_masting.dta
drop if _merge==2
drop _merge


**********MORE CLEANING
rename totalpollen pollen
rename totalspore spore

*there is some weirdness in the pollen measure - sometimes < 0
replace pollen = treepollen + grassweedpollen if pollen < 0
replace pollen = 0 if pollen < 0

replace treepollen = 0 if pollen==0
replace grassweedpollen = 0 if pollen==0

replace pollen = 0 if missing(pollen)


**********POLLEN VARIABLE CREATION
*Logs
gen ln_pollen_plus1 = ln(pollen + 1)
foreach i in 1 2 3 4 5 6 7 {
bysort county (date): gen ln_pollen_plus1_`i' = ln_pollen_plus1[_n-`i']
}

*Quadratics
gen pollen_sq = pollen^2
bysort county (date): gen pollen_sq_1 = pollen_sq[_n-1]
bysort county (date): gen pollen_sq_2 = pollen_sq[_n-2]

*Quartile
egen pollen_quartile = xtile(pollen), n(4)
foreach i in 1 2 3 4 {
	gen pollen_q`i' = (pollen_quartile == `i')
	bysort county (date): gen pollen_q`i'_1 = pollen_q`i'[_n-1]
	bysort county (date): gen pollen_q`i'_2 = pollen_q`i'[_n-2]
	gen pollen_qi`i' = pollen * pollen_q`i'
	bysort county (date): gen pollen_qi`i'_1 = pollen_qi`i'[_n-1]
	bysort county (date): gen pollen_qi`i'_2 = pollen_qi`i'[_n-2]
	
}

*Season
gen season = "fall" if month==9|month==10|month==11
replace season = "winter" if month==12|month==1|month==2
replace season = "spring" if month==3|month==4|month==5
replace season = "summer" if month==6|month==7|month==8	

*Seasonal quartiles
egen pollen_quartile_s = xtile(pollen), n(4) by(season)
foreach i in 1 2 3 4 {
	gen pollen_q`i'_s = (pollen_quartile_s == `i')
	bysort county (date): gen pollen_q`i'_s_1 = pollen_q`i'_s[_n-1]
	bysort county (date): gen pollen_q`i'_s_2 = pollen_q`i'_s[_n-2]
	gen pollen_qi`i'_s = pollen * pollen_q`i'_s
	bysort county (date): gen pollen_qi`i'_s_1 = pollen_qi`i'_s[_n-1]
	bysort county (date): gen pollen_qi`i'_s_2 = pollen_qi`i'_s[_n-2]
}

*Location-season quartiles
foreach j in pollen treepollen grassweedpollen spore {
egen `j'_quartile_ls = xtile(`j'), n(4) by(county season)
foreach i in 1 2 3 4 {
	gen `j'_q`i'_ls = (`j'_quartile_ls == `i') if !missing(`j'_quartile_ls)
	bysort county (date): gen `j'_q`i'_ls_1 = `j'_q`i'_ls[_n-1]
	bysort county (date): gen `j'_q`i'_ls_2 = `j'_q`i'_ls[_n-2]
	gen `j'_qi`i'_ls = `j' * `j'_q`i'_ls
	bysort county (date): gen `j'_qi`i'_ls_1 = `j'_qi`i'_ls[_n-1]
	bysort county (date): gen `j'_qi`i'_ls_2 = `j'_qi`i'_ls[_n-2]
}
}

*Moving average
foreach j in pollen treepollen grassweedpollen spore {
bysort county (date) : gen `j'_ma = (`j'[_n-1] + `j'[_n-2] + `j'[_n-3]+ `j'[_n-4]+ `j'[_n-5]+ pollen[_n-6]+ `j'[_n-7])/7

bysort county (date) : gen `j'_ma_1 = (`j'[_n-2] + `j'[_n-3] + `j'[_n-4]+ `j'[_n-5]+ `j'[_n-6]+ `j'[_n-7]+ `j'[_n-8])/7

bysort county (date) : gen `j'_ma_2 = (`j'[_n-3] + `j'[_n-4]+ `j'[_n-5]+ `j'[_n-6]+ `j'[_n-7]+ `j'[_n-8]+`j'[_n-9])/7

bysort county (date) : gen `j'_ma_7 = (`j'[_n-8] + `j'[_n-9]+ `j'[_n-10]+ `j'[_n-11]+ `j'[_n-12]+ `j'[_n-13]+`j'[_n-14])/7
}


**********WEATHER VARIABLE CREATION
*Precipitation indicator
gen any_precip = (ppt > 0)

*Quadratics
foreach i in tmax tmin tmean ppt wind_speed {
	gen `i'_sq = `i'^2
}

*Location-season quartiles
foreach j in wind_speed aqi pm2pt5 {
egen `j'_quartile_ls = xtile(`j'), n(4) by(county season)
foreach i in 1 2 3 4 {
	gen `j'_q`i'_ls = (`j'_quartile_ls == `i') if !missing(`j'_quartile_ls)
}
}

*Wind direction indicators
gen wind_dir_0_90 = wind_dir >= 0 & wind_dir <= 90
gen wind_dir_90_180 = wind_dir > 90 & wind_dir <= 180
gen wind_dir_180_270 = wind_dir > 180 & wind_dir <= 270
gen wind_dir_270_360 = wind_dir > 270 & wind_dir <= 360


**********FIXED EFFECTS CREATION
*More time variables
gen dow = dow(date)
gen week = week(date)

rename season season_name

*Fixed effects
egen season = group(season_name)
egen year_month = group(year month)
egen year_week = group(year week)
egen county_year_week = group(county year week)
egen county_week = group(county week)
egen county_season = group(county season)
egen county_season_year = group(county season year)
egen season_year = group(season year)
egen season_week = group(season week)
egen county_month_day = group(county month day)
egen county_year_month = group(county year month)
egen county_month_dow = group(county month dow)
egen month_day = group(month day)
egen county_year = group(county year)
egen county_month = group(county month)
egen county_dow = group(county dow)
egen month_dow = group(month dow)


**********SAVE
save data/for_analysis, replace
















